#################################################################################################################### #
# Script: The ecology of immigrant naturalisation: a life course approach in the context of institutional conditions #                                                                                                                    #
# Floris Peters, Maarten Vink & Hans Schmeets                                                                        #                                                                                                                    #
# Journal of Ethnic and Migration Studies                                                                            #
######################################################################################################################


################
# Introduction #
################

# This file provides the syntax used to create all the tables and figures in the paper. 
# The dataset contains sensitive, micro level information. As such, for privacy reasons the data is only available to individuals employed at or affiliated to Statistics Netherlands. 
# The dataset can be found at the following location on the network of Statistics Netherlands: \\cbsp.nl\Productie\Projecten\SAL\209253UM_FP_SEC1\Werk\Floris\PhD\JEMS_naturalisation 

#######################################################################################################################
#######################################################################################################################

#############
# Variables #
#############

# ID
# (Individual identification number)

# START
# (Time vector - start of a given time period)

# STOP
# (Time vector - end of a given time period)

# EVENT
# (Acquiring Dutch citizenship)
# [0] The event does not occur during this time period; 
# [1] The event occurs at the end of this time period

# IMMIGRATIONYEAR
# (Year of first immigration to the Netherlands)

# GENDER
# [0] Female; 
# [1] Male

# AGEARRIVAL
# (Age at the moment of migration in years)

# AGECAT 
# (Age at the moment of migration in categories) 
# [1] 15-17 years; 
# [2] 18-24 years; 
# [3] 25-34 years; 
# [4] 35-44 years; 
# [5] 45-54 years; 
# [6] 55-64 years; 
# [7] 65-74 years; 
# [8]: >74 years

# PARTNER
# [1] No partner; 
# [2] Native Dutch partner; 
# [3] Foreign born foreign partner; 
# [4] Year naturalization partner; 
# [5] 1 year after naturalization partner; 
# [6] 2 years after naturalization partner; 
# [7] 3 years after naturalization partner; 
# [8] >3 years after naturalization partner

# CHILD 
# (Age of the youngest child in the household in years)
# [999] No children in the household

# CHILDREC
# (Children in the household in categories)
# [1]: Children <18 in household; 
# [2]: No children <18 in household

# DUALCIT
# (Dual citizenship toleration in the origin country)
# [0] Country does not exist on January 1 of reference year; 
# [110] Automatic loss of citizenship of origin country; 
# [111] Automatic loss of citizenship of origin country; State is Party to Chapter 1, Strasbourg  Convention
# [112] Automatic loss of citizenship of origin country; State is Party to Chapter 1, Strasbourg Convention and Second Protocol; 
# [210] No automatic loss of citizenship of origin country, but renunciation of citizenship of origin country is possible;
# [211] No automatic loss of citizenship, but renunciation is possible; State is Party to Chapter 1, Strasbourg Convention; 
# [212] Same as 210, but including the Second Protocol; 
# [220] Same as 210, but automatic loss for persons who have acquired citizenship of origin country by naturalization; 
# [310] No automatic loss of citizenship of origin country, and renunciation of citizenship of origin country is not possible;
# [320] Same as 310, but automatic loss for persons who have acquired citizenship of origin country by naturalization; 
# [330] Same as 310, but renunciation is possible only for those who have acquired citizenship of origin by naturalization;
# [999] Rules on loss of citizenship of origin country are unknown

# DUALCITREC
# (Dual citizenship toleration in the origin country in categories)
# [0] No automatic loss dual nationality; 
# [1] Automatic loss dual nationality

# DEVELOPMENT
# (Human Development Index (HDI) score origin country)
  
# DEVCAT
# (Human Development index (HDI) score origin country in categories)
# [1] First quartile; 
# [2] Second quartile; 
# [3] Third quartile; 
# [4] Fourth quartile
  
# STABILITY
# (Kaufmann index score origin country)
  
# STACAT
# (Kaufmann index score origin country in categories)
# [1] First quartile; 
# [2] Second quartile; 
# [3] Third quartile; 
# [4] Fourth quartile
    
# EU
# [0] Not EU country of origin; 
# [1] EU country of origin
    
# EDUCATION
# [1] Low education; 
# [2] Middle education; 
# [3] High education
    
# RUSH
# (Rush into naturalization dummy used in table A5 (only included in dataset "Data_cit_acq_rush"; see Table A5 below)
# [0] not time period between 01-04-2002 and 01-04-2003; 
# [1] time period between 01-04-2002 and 01-04-2003
      
#######################################################################################################################
#######################################################################################################################


###########     
# Table 1 #
###########      

#Upload the dataset (Data_cit_acq_main.sav) to R and load the necessary libraries
library(splines)
library(foreign)
library(dplyr)
dataset_main <- read.csv(file.choose(),header=T,sep=";")

#compute the survival function
dataset_main$surv_main <- Surv(dataset_main$START, dataset_main$STOP, dataset_main$EVENT)
      
#Cox regression: Table 1 - model 1
table_1.1 <- coxph(surv_main ~ GENDER + AGEARRIVAL + as.factor(PARTNER) + CHILD + DUALCITREC + DEVELOPMENT + STABILITY + EU, dataset = dataset_main)
      
#Cox regression: Table 1 - model 2
table_1.2 <- coxph(surv_main ~ GENDER + AGEARRIVAL + as.factor(PARTNER) + CHILD + DUALCITREC + DEVELOPMENT + STABILITY + EU + as.factor(IMMIGRATIONYEAR), dataset = dataset_main)
    
 
#############      
# Figure 1a #
#############      
      
#select cases low development and immigrationcohort
dataset_main <- read.csv(file.choose(),header=T,sep=";")
dataset_low_early <- subset(dataset_main, DEVELOPMENT < 0.69651 & IMMIGRATIONYEAR < 1998)

#compute the survival function
dataset_low_early$surv_main <- Surv(dataset_low_early$START, dataset_low_early$STOP, dataset_low_early$EVENT)

#compute the Kaplan-Meier curve
dataset_low_early$km1.1 <- survfit(dataset_low_early$surv_main ~ 1)
      
#plot the Kaplan-Meier curve
figure_1.1 <- plot(dataset_low_early$km1.1, ylim = c(.2, 1), xlim = c(156, 520), 
                 xlab = "Weeks of residence", ylab = "Proportion not naturalized", 
                 col = 2, lty = 1)

#select cases high development and immigrationcohort
dataset_high_early <- subset(dataset_main, DEVELOPMENT >= 0.69651 & IMMIGRATIONYEAR < 1998)

#compute the survival function
dataset_high_early$surv_main <- Surv(dataset_high_early$START, dataset_high_early$STOP, dataset_high_early$EVENT)

#compute the Kaplan-Meier curve
dataset_high_early$km1.2 <- survfit(dataset_high_early$surv_main ~ 1)

#retain the previous plot
Par(new=T)

#plot the Kaplan-Meier curve
figure_1.2 <- plot(dataset_high_early$km1.2, ylim = c(.2, 1), xlim = c(156, 520), 
                 xlab = "Weeks of residence", ylab = "Proportion not naturalized", 
                 col = 4, lty = 1)

#add legend
legend.txt1.1 <- c("High development", "Low development")
legend("topright", legend = legend.txt1.1, col = c(4,2), lty = c(1,1))


#############      
# Figure 1b #
#############  

#select cases low development and immigrationcohort
dataset_main <- read.csv(file.choose(),header=T,sep=";")
dataset_low_late <- subset(dataset_main, DEVELOPMENT < 0.69651 & IMMIGRATIONYEAR > 1999)

#compute the survival function
dataset_low_late$surv_main <- Surv(dataset_low_late$START, dataset_low_late$STOP, dataset_low_late$EVENT)

#compute the Kaplan-Meier curve
dataset_low_late$km1.3 <- survfit(dataset_low_late$surv_main ~ 1)

#plot the Kaplan-Meier curve
figure_1.3 <- plot(dataset_low_late$km1.3, ylim = c(.2, 1), xlim = c(156, 520), 
                   xlab = "Weeks of residence", ylab = "Proportion not naturalized", 
                   col = 2, lty = 1)

#select cases high development and immigrationcohort
dataset_high_late <- subset(dataset_main, DEVELOPMENT >= 0.69651 & IMMIGRATIONYEAR > 1999)

#compute the survival function
dataset_high_late$surv_main <- Surv(dataset_high_late$START, dataset_high_late$STOP, dataset_high_late$EVENT)

#compute the Kaplan-Meier curve
dataset_high_late$km1.4 <- survfit(dataset_high_late$surv_main ~ 1)

#retain the previous plot
Par(new=T)

#plot the Kaplan-Meier curve
figure_1.4 <- plot(dataset_high_late$km1.4, ylim = c(.2, 1), xlim = c(156, 520), 
                   xlab = "Weeks of residence", ylab = "Proportion not naturalized", 
                   col = 4, lty = 1)

#add legend
legend.txt1.2 <- c("High development", "Low development")
legend("topright", legend = legend.txt1.2, col = c(4,2), lty = c(1,1))


############     
# Figure 2 #
############

#select cases early immigrationcohort
dataset_main <- read.csv(file.choose(),header=T,sep=";")
dataset_early <- subset(dataset_main, IMMIGRATIONYEAR < 1998)

#compute the survival function
dataset_early$surv_main <- Surv(dataset_early$START, dataset_early$STOP, dataset_early$EVENT)

#compute the Kaplan-Meier curve
dataset_early$km2.1 <- survfit(dataset_early$surv_main ~ 1)

#plot the Kaplan-Meier curve
figure_2.1 <- plot(dataset_early$km2.1, ylim = c(.2, 1), xlim = c(156, 520), 
                   xlab = "Weeks of residence", ylab = "Proportion not naturalized", 
                   col = 2, lty = 1)

#select cases middle immigrationcohort
dataset_mid <- subset(dataset_main, IMMIGRATIONYEAR >= 1998 & IMMIGRATIONYEAR < 2000)

#compute the survival function
dataset_mid$surv_main <- Surv(dataset_mid$START, dataset_mid$STOP, dataset_mid$EVENT)

#compute the Kaplan-Meier curve
dataset_mid$km2.2 <- survfit(dataset_mid$surv_main ~ 1)

#retain the previous plot
Par(new=T)

#plot the Kaplan-Meier curve
figure_2.2 <- plot(dataset_mid$km2.2, ylim = c(.2, 1), xlim = c(156, 520), 
                   xlab = "Weeks of residence", ylab = "Proportion not naturalized", 
                   col = 3, lty = 1)

#select cases late immigrationcohort
dataset_late <- subset(dataset_main, IMMIGRATIONYEAR >= 2000)

#compute the survival function
dataset_late$surv_main <- Surv(dataset_late$START, dataset_late$STOP, dataset_late$EVENT)

#compute the Kaplan-Meier curve
dataset_late$km2.3 <- survfit(dataset_late$surv_main ~ 1)

#retain the previous plot
Par(new=T)

#plot the Kaplan-Meier curve
figure_2.3 <- plot(dataset_late$km2.3, ylim = c(.2, 1), xlim = c(156, 520), 
                   xlab = "Weeks of residence", ylab = "Proportion not naturalized", 
                   col = 4, lty = 1)

#add legend
legend.txt2 <- c("Cohort 1995-1997", "Cohort 1998-1999", "Cohort 2000-2002")
legend("topright", legend = legend.txt2, col = c(2,3,4), lty = c(1,1,1))


############
# TABLE A1 #
############

# Load main dataset
dataset_main <- read.csv(file.choose(),header=T,sep=";")

# Identify last case per individual
last_row <- dataset_main %>%
  group_by(ID) %>%
  arrange(ID) %>%
  filter(tail(1))

# Extract last case per individual
last_row = subset(last_row, select = c(ID,YEAR))

# Create dummy variable indicating last case per individual
last_case <- 1

# Attach dummy variable to last case
last_row <- cbind(cbind, last_row,last_case)

# Merge last case with main dataset and sort by ID and YEAR
dataset_main <- merge(dataset_main, last_row, by = c("ID","YEAR"), all = T)
dataset_main <- dataset_main[with(dataset_main, order(ID,YEAR)),]

# Select last case per individual
dataset_last_case <- subset(dataset_main, last_case == 1)

# Descriptives
# Frequencies
table_gender <- table(dataset_last_case$GENDER, dataset_last_case$EVENT)
table_age <- table(dataset_last_case$AGECAT, dataset_last_case$EVENT)
table_partner <- table(dataset_last_case$PARTNER, dataset_last_case$EVENT)
table_child <- table(dataset_last_case$CHILDREC, dataset_last_case$EVENT)
table_dualcit <- table(dataset_last_case$DUALCITREC, dataset_last_case$EVENT)
table_hdi <- table(dataset_last_case$DEVCAT, dataset_last_case$EVENT)
table_kaufmann <- table(dataset_last_case$STACAT, dataset_last_case$EVENT)
table_eu <- table(dataset_last_case$EU, dataset_last_case$EVENT)
table_cohorts <- table(dataset_last_case$IMMIGRATIONYEAR, dataset_last_case$EVENT)

# Merge frequencies into descriptives table
table_A1.1 <- rbind(table_gender,table_age,table_partner,table_child,table_dualcit,table_hdi,
                    table_kaufmann,table_eu,table_cohorts)
# Rates
rate_gender <- prop.table(table(dataset_last_case$GENDER, dataset_last_case$EVENT), 1)
rate_age <- prop.table(table(dataset_last_case$AGECAT, dataset_last_case$EVENT), 1)
rate_partner <- prop.table(table(dataset_last_case$PARTNER, dataset_last_case$EVENT), 1)
rate_child <- prop.table(table(dataset_last_case$CHILDREC, dataset_last_case$EVENT), 1)
rate_dualcit <- prop.table(table(dataset_last_case$DUALCITREC, dataset_last_case$EVENT), 1)
rate_hdi <- prop.table(table(dataset_last_case$DEVCAT, dataset_last_case$EVENT), 1)
rate_kaufmann <- prop.table(table(dataset_last_case$STACAT, dataset_last_case$EVENT), 1)
rate_eu <- prop.table(table(dataset_last_case$EU, dataset_last_case$EVENT), 1)
rate_cohorts <- prop.table(table(dataset_last_case$IMMIGRATIONYEAR, dataset_last_case$EVENT), 1)
      
# Merge rates into descriptives table
table_A1.2 <- rbind(rate_gender,rate_age,rate_partner,rate_child,rate_dualcit,rate_hdi,
                  rate_kaufmann,rate_eu,rate_cohorts)
      
# Merge into overall table
table_A1 <- cbind(table_A1.1,table_A1.2)


############
# TABLE A2 #
############     

#select cases low development
dataset_main <- read.csv(file.choose(),header=T,sep=";")
dataset_low <- subset(dataset_main, DEVELOPMENT < 0.69442)

#compute the survival function
dataset_low$surv_main <- Surv(dataset_low$START, dataset_low$STOP, dataset_low$EVENT)

#Cox regression: Table A2a
table_A2 <- coxph(surv_main ~ GENDER + AGEARRIVAL + as.factor(PARTNER) + CHILD + DUALCITREC + as.factor(IMMIGRATIONYEAR), dataset = dataset_low)


############
# TABLE A3 #
############ 

#select cases high development
dataset_main <- read.csv(file.choose(),header=T,sep=";")
dataset_high <- subset(dataset_main, DEVELOPMENT >= 0.69442)

#compute the survival function
dataset_high$surv_main <- Surv(dataset_high$START, dataset_high$STOP, dataset_high$EVENT)

#Cox regression: Table A2a
table_A3 <- coxph(surv_main ~ GENDER + AGEARRIVAL + as.factor(PARTNER) + CHILD + DUALCITREC + as.factor(IMMIGRATIONYEAR), dataset = dataset_high)

    
############
# TABLE A4 #
############      

# Load main dataset
dataset_main <- read.csv(file.choose(),header=T,sep=";")

# Identify last case per individual
last_row <- dataset_main %>%
  group_by(ID) %>%
  arrange(ID) %>%
  filter(tail(1))

# Extract last case per individual
last_row = subset(last_row, select = c(ID,YEAR))

# Create dummy variable indicating last case per individual
last_case <- 1

# Attach dummy variable to last case
last_row <- cbind(cbind, last_row,last_case)

# Merge last case with main dataset and sort by ID and YEAR
dataset_main <- merge(dataset_main, last_row, by = c("ID","YEAR"), all = T)
dataset_main <- dataset_main[with(dataset_main, order(ID,YEAR)),]

# Select last case per individual
dataset_last_case <- subset(dataset_main, last_case == 1)

# Descriptives
desc_gender <- table(dataset_last_case$GENDER)
desc_age <- mean(dataset_last_case$AGEARRIVAL)
desc_partner <- table(dataset_last_case$PARTNER)
desc_child <- table(dataset_last_case$CHILD)  
desc_dualcit <- table(dataset_last_case$DUALCITREC)
desc_hdi <- mean(dataset_last_case$DEVELOPMENT)
desc_kaufmann <- mean(dataset_last_case$STABILITY)
desc_eu <- table(dataset_last_case$EU)
  
# Merge rates into descriptives table
table_A4.1 <- rbind(desc_gender,desc_age,desc_partner,desc_child,desc_dualcit,desc_hdi,
                    desc_kaufmann,desc_eu)

# select subsample with valid cases on education and immigrationyear >= 2000
dataset_main_late_edu <- subset(dataset_main, IMMIGRATIONYEAR > 1999 & EDUCATION >= 1)

# Identify last case per individual
last_row_late_edu <- dataset_main_late_edu %>%
  group_by(ID) %>%
  arrange(ID) %>%
  filter(tail(1))

# Extract last case per individual
last_row_late_edu = subset(last_row_late_edu, select = c(ID,YEAR))

# Create dummy variable indicating last case per individual
last_case_late_edu <- 1

# Attach dummy variable to last case
last_row_late_edu <- cbind(cbind, last_row_late_edu,last_case_late_edu)

# Merge last case with main dataset and sort by ID and YEAR
dataset_main_late_edu <- merge(dataset_main_late_edu, last_row_late_edu, by = c("ID","YEAR"), all = T)
dataset_main_late_edu <- dataset_main_late_edu[with(dataset_main_late_edu, order(ID,YEAR)),]

# Select last case per individual
dataset_last_case_late_edu <- subset(dataset_main_late_edu, last_case == 1)

# Descriptives
desc_late_edu_gender <- table(dataset_last_case_late_edu$GENDER)
desc_late_edu_age <- mean(dataset_last_case_late_edu$AGEARRIVAL)
desc_late_edu_partner <- table(dataset_last_case_late_edu$PARTNER)
desc_late_edu_child <- table(dataset_last_case_late_edu$CHILD)  
desc_late_edu_dualcit <- table(dataset_last_case_late_edu$DUALCITREC)
desc_late_edu_hdi <- mean(dataset_last_case_late_edu$DEVELOPMENT)
desc_late_edu_kaufmann <- mean(dataset_last_case_late_edu$STABILITY)
desc_late_edu_eu <- table(dataset_last_case_late_edu$EU)
desc_late_edu_education <- table(dataset_last_case_late_edu$EDUCATION)

# Merge rates into descriptives table
table_A4.2 <- rbind(desc_late_edu_gender,desc_late_edu_age,desc_late_edu_partner,desc_late_edu_child,
                    desc_late_edu_dualcit,desc_late_edu_hdi,desc_late_edu_kaufmann,desc_late_edu_eu,desc_late_edu_education)


############
# TABLE A5 #
############      

# Load main dataset, and late immigration cohorts with valid education cases
dataset_main <- read.csv(file.choose(),header=T,sep=";")
dataset_late_edu <- subset(dataset_main, IMMIGRATIONYEAR > 1999 & EDUCATION >= 1)

#compute the survival function
dataset_late_edu$surv_main <- Surv(dataset_late_edu$START, dataset_late_edu$STOP, dataset_late_edu$EVENT)

#Cox regression: Table A5
table_A5 <- coxph(surv_main ~ GENDER + AGEARRIVAL + as.factor(PARTNER) + CHILD + DUALCITREC + DEVELOPMENT2 + 
                    STABILITY2 + EU + as.factor(EDUCATION), dataset = dataset_late_edu)

############
# TABLE A6 #
############ 

# Table A6 is an exception to the rule that every piece of output uses the dataset "Data_cit_acq_main" as the starting point. Instead, this table uses "Data_cit_acq_rush". 
# This dataset contains slightly more observations, since specific time periods were inserted for individuals who did not originally have a transition on April 1 2002 and April 1 2003. 
# The dataset "Data_cit_acq_rush" can be found at the following location on the network of Statistics Netherlands: \\Ssb2f\ssb24\SatSocSam\MACIMIDE\DATA\Analyse\Paper_Citizenship_Acquisition 

#Upload the dataset (Data_cit_acq_rush.sav) to R
dataset_rush <- read.csv(file.choose(),header=T,sep=";")

#compute the survival function
dataset_rush$surv_main <- Surv(dataset_rush$START, dataset_rush$STOP, dataset_rush$EVENT)

#Cox regression: Table A5
table_A6 <- coxph(surv_main ~ GENDER + AGEARRIVAL + as.factor(PARTNER) + CHILD + DUALCITREC + 
                    DEVELOPMENT2 + STABILITY2 + EU + as.factor(IMMIGRATIONYEAR) + RUSH, dataset = dataset_rush)


##############     
# Figure A1a #
##############

#select cases early immigrationcohort and low stability origin country
dataset_main <- read.csv(file.choose(),header=T,sep=";")
dataset_early_low_kaufmann <- subset(dataset_main, STABILITY < -0.1769 & IMMIGRATIONYEAR < 1998)

#compute the survival function
dataset_early_low_kaufmann$surv_main <- Surv(dataset_early_low_kaufmann$START, dataset_early_low_kaufmann$STOP, dataset_early_low_kaufmann$EVENT)

#compute the Kaplan-Meier curve
dataset_early_low_kaufmann$kmA1.1 <- survfit(dataset_early_low_kaufmann$surv_main ~ 1)

#plot the Kaplan-Meier curve
figure_A1.1 <- plot(dataset_early_low_kaufmann$kmA1.1, ylim = c(.2, 1), xlim = c(156, 520), 
                   xlab = "Weeks of residence", ylab = "Proportion not naturalized", 
                   col = 2, lty = 1)

#select cases early immigrationcohort and high stability origin country
dataset_early_high_kaufmann <- subset(dataset_main, STABILITY >= -0.1769 & IMMIGRATIONYEAR < 1998)

#compute the survival function
dataset_early_high_kaufmann$surv_main <- Surv(dataset_early_high_kaufmann$START, dataset_early_high_kaufmann$STOP, dataset_early_high_kaufmann$EVENT)

#compute the Kaplan-Meier curve
dataset_early_high_kaufmann$km1.2 <- survfit(dataset_early_high_kaufmann$surv_main ~ 1)

#retain the previous plot
Par(new=T)

#plot the Kaplan-Meier curve
figure_A1.2 <- plot(dataset_early_high_kaufmann$km1.2, ylim = c(.2, 1), xlim = c(156, 520), 
                   xlab = "Weeks of residence", ylab = "Proportion not naturalized", 
                   col = 4, lty = 1)

#add legend
legend.txt_A1a <- c("High stability", "Low stability")
legend("topright", legend = legend.txt_A1a, col = c(4,2), lty = c(1,1))


##############     
# Figure A1b #
##############

#select cases late immigrationcohort and low stability origin country
dataset_main <- read.csv(file.choose(),header=T,sep=";")
dataset_late_low_kaufmann <- subset(dataset_main, STABILITY < -0.5443 & IMMIGRATIONYEAR > 1999)

#compute the survival function
dataset_late_low_kaufmann$surv_main <- Surv(dataset_late_low_kaufmann$START, dataset_late_low_kaufmann$STOP, dataset_late_low_kaufmann$EVENT)

#compute the Kaplan-Meier curve
dataset_late_low_kaufmann$kmA1.3 <- survfit(dataset_late_low_kaufmann$surv_main ~ 1)

#plot the Kaplan-Meier curve
figure_A1.3 <- plot(dataset_late_low_kaufmann$kmA1.3, ylim = c(.2, 1), xlim = c(156, 520), 
                    xlab = "Weeks of residence", ylab = "Proportion not naturalized", 
                    col = 2, lty = 1)

#select cases late immigrationcohort and high stability origin country
dataset_late_high_kaufmann <- subset(dataset_main, STABILITY >= -0.5443 & IMMIGRATIONYEAR > 1999)

#compute the survival function
dataset_late_high_kaufmann$surv_main <- Surv(dataset_late_high_kaufmann$START, dataset_late_high_kaufmann$STOP, dataset_late_high_kaufmann$EVENT)

#compute the Kaplan-Meier curve
dataset_late_high_kaufmann$km1.4 <- survfit(dataset_late_high_kaufmann$surv_main ~ 1)

#retain the previous plot
Par(new=T)

#plot the Kaplan-Meier curve
figure_A1.4 <- plot(dataset_late_high_kaufmann$km1.4, ylim = c(.2, 1), xlim = c(156, 520), 
                    xlab = "Weeks of residence", ylab = "Proportion not naturalized", 
                    col = 4, lty = 1)

#add legend
legend.txt_A1b <- c("High stability", "Low stability")
legend("topright", legend = legend.txt_A1b, col = c(4,2), lty = c(1,1))
